# === setup === #
library(here)
source(here("GitControlled/Codes/0_1_ls_packages.R"))
source(here("GitControlled/Codes/0_0_functions.R"))
#/*--------------------------------*/
#' ## Data
#/*--------------------------------*/
# === NRD boundary === #
nrd_boud <-
here("Shared/Data/WaterAnalysis/NRD_bd/BND_NaturalResourceDistricts_DNR.shp") %>%
st_read() %>%
filter(NRD_Name %in% c("Lower Republican", "Tri-Basin")) %>%
st_transform(4269)
Reading layer `BND_NaturalResourceDistricts_DNR' from data source `/Users/shunkeikakimoto/Dropbox/ResearchProject/HeterogeneousAllocation/Shared/Data/WaterAnalysis/NRD_bd/BND_NaturalResourceDistricts_DNR.shp' using driver `ESRI Shapefile'
Simple feature collection with 23 features and 6 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: -11583170 ymin: 4865934 xmax: -10609670 ymax: 5312233
Projected CRS: WGS 84 / Pseudo-Mercator
# === well are located in east side of US Highway 183? === #
east_or_west <-
here("Shared/Data/WaterAnalysis/east_or_west.rds") %>%
readRDS()
# === Data period: 2008-2015 === #
reg_data_all <-
here("Shared/Data/WaterAnalysis/comp_reg_dt.rds") %>%
readRDS() %>%
.[,owner_name := paste0(firstname, "_", lastname)] %>%
.[year %in% 2008:2015 & usage <= 45]
# east_or_west[., on = "wellid"]
# === Data period: 2008-2012 === #
reg_data <-
reg_data_all %>%
.[year %in% 2008:2012]
# === All Well Data (Original data)=== #
ir_data <-
here('Shared/Data/ir_reg.rds') %>%
readRDS() %>%
data.table() %>%
.[source %in% c('Meter','METER','metered') & nrdname %in% c("Lower Republican", "Tri-Basin")] %>%
.[,owner_name := paste0(firstname, "_", lastname)]
# .[year %in% 2008:2012]
# --- check: Low_Tri_5mi --- #
ggplot() +
geom_sf(data=nrd_boud) +
geom_sf(data=st_as_sf(ir_data, coords = c("longdd","latdd"), crs = 4269),
aes(color=factor(Low_Tri_5mi)), size=0.5)
"Lower Republican ownerA")to filter the data.outside_5mi_LR <-
ir_data %>%
.[,nrd_owner_name := paste0(nrdname, " ", owner_name) ] %>%
.[nrdname=="Lower Republican" & Low_Tri_5mi==0, nrd_owner_name] %>%
unique()
both_InOut_5mi_LR showing whether a landowner has farmlands in both inside and outside of the buffer facing LR so that we can distinguish them later.# create "both_InOut_5mi" index
reg_data <-
reg_data %>%
.[,nrd_owner_name := paste0(nrdname, " ", owner_name) ] %>%
.[, both_InOut_5mi_LR := ifelse(nrd_owner_name %in% outside_5mi_LR, 1, 0)]
# Don't need to run
reg_data %>%
unique(., by="nrd_owner_name") %>%
.[, .N, by=.(nrdname, both_InOut_5mi_LR) ] %>%
.[order(nrdname)] %>%
print()
nrdname both_InOut_5mi_LR N
1: Lower Republican 0 306
2: Lower Republican 1 81
3: Tri-Basin 0 445
both_InOut_5mi_LR=1 indicates the farmers who also owns farmland outside of the 5-mile buffer of LR side, and the number fo those farmers is 81.
both_InOut_5mi_LR to filter the data.# Remove owners who have farmlands in both inside and outside of the buffer
mod_reg_data <-
reg_data[both_InOut_5mi_LR==0,]
# - Data on TB - #
reg_data_TB <-
mod_reg_data[nrdname=="Tri-Basin",]
# - Data on LR - #
reg_data_LR <-
mod_reg_data[nrdname=="Lower Republican",]
tr (combinations of “township” and “range” index).tr is not one, if a farmer has farmland across several “range” .# Don't need to run
# Ideally, a single farmers have only one single "tr". Check how many of farmers whose farmland is scattered across multiple "tr"
tr_cl <-
reg_data_LR %>%
distinct(nrd_owner_name, tr) %>%
.[, index := 1,] %>%
dcast(.,nrd_owner_name ~ tr, value.var = "index") %>%
replace(is.na(.), 0) %>%
.[, total := rowSums(.SD), by = nrd_owner_name]
# tr_cl[total>1, ]
tr segments.# Don't need to run
county_cl <-
reg_data_LR %>%
distinct(nrd_owner_name, county) %>%
.[, index := 1,] %>%
dcast(.,nrd_owner_name ~ county, value.var = "index") %>%
replace(is.na(.), 0) %>%
.[, total := rowSums(.SD), by = nrd_owner_name]
# county_cl[total>1, nrd_owner_name]
county_cl[total>1, ] %>% print()
nrd_owner_name FRANKLIN FURNAS HARLAN KEARNEY total
1: Lower Republican David_Black 1 0 1 0 2
2: Lower Republican John & Ingrid_Tangeman 1 1 0 0 2
3: Lower Republican Wayne & Linda_Brummer 1 0 1 0 2
4: Lower Republican _Franklin County Farm 1 0 0 1 2
# Don't need to run
ne_county_sf <- tigris::counties("Nebraska", cb = TRUE, resolution="20m", progress_bar = FALSE) %>%
dplyr::select(COUNTYFP, NAME) %>%
filter(NAME %in% str_to_sentence(unique(mod_reg_data$county)))%>%
st_transform(4269)
county_cl_sf <-
reg_data_LR %>%
.[nrd_owner_name %in% county_cl[total>1, nrd_owner_name],] %>%
distinct(nrd_owner_name, longdd, latdd) %>%
st_as_sf(., coords = c("longdd","latdd"), crs = 4269)
ggplot()+
geom_sf(data=ne_county_sf, aes(fill=NAME), alpha=0.6) +
geom_sf(data=nrd_boud, color="blue", fill=NA) +
geom_sf(data=county_cl_sf, size=1)+
facet_wrap(~nrd_owner_name, ncol=2)
# reg_data_LR %>%
# .[nrd_owner_name == "Lower Republican John & Ingrid_Tangeman"] %>%
# distinct(nrd_owner_name, longdd, latdd, .keep_all = TRUE)
# reg_data_LR %>%
# .[nrd_owner_name == "Lower Republican David_Black"] %>%
# distinct(nrd_owner_name, longdd, latdd, .keep_all = TRUE)
Each panel in the figure above shows how farmland of each of those four farmers is distributed.
# reg_data_LR$county %>% unique()
reg_data_LR2 <-
reg_data_LR %>%
.[,county_fix := case_when(
# - first group - #
nrd_owner_name == "Lower Republican John & Ingrid_Tangeman" ~ "FURNAS",
nrd_owner_name == "Lower Republican David_Black" ~ "HARLAN",
# - second group - #
nrd_owner_name == "Lower Republican _Franklin County Farm" ~ "FRANKLIN",
nrd_owner_name == "Lower Republican Wayne & Linda_Brummer" ~ "HARLAN",
TRUE ~ county
)]
# county_cl2 <-
# reg_data_LR2 %>%
# distinct(nrd_owner_name, county_fix) %>%
# .[, index := 1,] %>%
# dcast(.,nrd_owner_name ~ county_fix, value.var = "index") %>%
# replace(is.na(.), 0) %>%
# .[, total := rowSums(.SD), by = nrd_owner_name]
# county_cl2[total>1, ]
reg_data_LR_mean <-
reg_data_LR2 %>%
.[,.(
usage = mean(usage),
treat2 = mean(treat2),
# --- soil --- #
silttotal_r = mean(silttotal_r),
claytotal_r = mean(claytotal_r),
slope_r = mean(slope_r),
ksat_r = mean(ksat_r),
awc_r = mean(awc_r),
# --- weather --- #
pr_in = mean(pr_in),
pet_in = mean(pet_in),
gdd_in = mean(gdd_in),
# --- tr --- #
county = unique(county_fix)
), by = .(nrd_owner_name, year)] %>%
.[,county_year := paste0(county, "_", year)]
reg_data_TB_final <-
reg_data_TB %>%
.[, names(reg_data_LR_mean) %>% .[-length(.)], with=FALSE] %>%
.[,county_year := paste0(county, "_", year)]
reg_data_final <- rbind(reg_data_LR_mean, reg_data_TB_final)
# hist(reg_data_LR_mean$usage)
cov_ls[selected_vars] %>% print()
[1] "silttotal_r" "awc_r"
county_yearvis_cf_raw_cl
vis_cf_res
usage is the average amount of water applied to the field (unit is inches).usage by owneracres).| Landowners categorized by the number of farmlands owned |
|
|---|---|
| num_fields | num_farmers |
| 1 | 162 |
| 2 | 79 |
| 3 | 29 |
| 4 | 12 |
| 5 | 8 |
| 6 | 7 |
| 7 | 3 |
| 8 | 1 |
| 9 | 1 |
| 10 | 3 |
| 14 | 1 |
usage:=volaf*12/acres
volaf: is a measurement of the volume of water used on the farmland (unit: acre-foot)volaf*12: the measurement unit acre-inches
volaf*12 by acres, it becomes volume of water inches (per acre) (acre-inches by acre)
usage is inchesvolaf*12)/(sum of acres)ir_reg.rds) has two columns acres of fields : i.acres and acres.temp <- reg_data_LR2[1]
temp$usage == temp$volaf*12/temp$i.acres
[1] FALSE
temp$usage == temp$volaf*12/temp$acres
[1] TRUE
acres is the correct one.cov_ls <- c(
# --- weather --- #
"pr_in","pet_in",
# --- soil --- #
"silttotal_r", "claytotal_r", "slope_r", "ksat_r", "awc_r"
)
var_ls <- c("usage", "treat2", cov_ls)
# === LR data === #
reg_data_LR_mean <-
reg_data_LR2 %>%
.[,.(
usage_avg = mean(usage),
usage = sum(volaf*12)/sum(acres),
treat2 = mean(treat2),
# --- soil --- #
silttotal_r = mean(silttotal_r),
claytotal_r = mean(claytotal_r),
slope_r = mean(slope_r),
ksat_r = mean(ksat_r),
awc_r = mean(awc_r),
# --- weather --- #
pr_in = mean(pr_in),
pet_in = mean(pet_in),
gdd_in = mean(gdd_in),
# --- tr --- #
county = unique(county_fix)
), by = .(nrd_owner_name, year)] %>%
.[,county_year := paste0(county, "_", year)]
# === TB data === #
reg_data_TB_final <-
reg_data_TB %>%
.[,county_year := paste0(county, "_", year)]
# === Merge LR and TB data === #
reg_data_final2 <-
rbind(
reg_data_LR_mean[, c(var_ls, "county_year"), with=FALSE],
reg_data_TB_final[, c(var_ls, "county_year"),
with=FALSE]
)
usage_avg = mean(usage) and usage = sum(volaf*12)/sum(acres) + Green is
sum(volaf*12)/sum(acres) and Blue is mean(usage) + Overall, sum(volaf*12)/sum(acres) is slightly smaller than mean(usage)
data <- reg_data_final2
Y <- data[, usage]
T <- data[, treat2]
X <- data[, ..cov_ls]
cl_var <- data[, county_year] %>% factor()
#/*--------------------------------*/
#' ## 1st CF
#/*--------------------------------*/
set.seed(23456)
forest_W_cl <- regression_forest(
X, T,
clusters = cl_var,
num.trees = 4000)
W_hat <- predict(forest_W_cl)$predictions
forest_Y_cl <- regression_forest(
X, Y,
clusters = cl_var,
num.trees = 4000)
Y_hat <- predict(forest_Y_cl)$predictions
cf_raw_cl <- causal_forest(
X=X,
Y=Y,
W=T,
Y.hat = Y_hat,
W.hat = W_hat,
clusters = cl_var,
tune.parameters = "all",
num.trees = 4000
)
varimp = variable_importance(cf_raw_cl)
selected_vars = which(varimp > mean (varimp))
# selected_vars = which(varimp/mean(varimp) > 0.2)
se_cov <- data[,cov_ls[selected_vars], with=FALSE]
vis_cf_raw_cl <- gen_impact_viz(
cf_res= cf_raw_cl,
data_base=data,
treat_var='treat2',
var_ls= cov_ls,
var_ls_int = cov_ls
)
# vis_cf_raw_cl
#/*--------------------------------*/
#' ## 2nd CF
#/*--------------------------------*/
cf_res <- causal_forest(
X=se_cov,
Y=Y,
W=T,
Y.hat = Y_hat,
W.hat = W_hat,
clusters = cl_var,
# sample.fraction = 0.4,
num.trees = 4000,
tune.parameters = "all",
# tune.parameters = c(""),
)
#/*--------------------------------*/
#' ## Visualization
#/*--------------------------------*/
vis_cf_res <-
gen_impact_viz(
cf_res= cf_res,
data_base=data,
treat_var='treat2',
var_ls= cov_ls[selected_vars],
var_ls_int = cov_ls[selected_vars]
)
# vis_cf_res
vis_cf_raw_cl
vis_cf_res